package com.daervin.svc.parser;

import com.daervin.svc.common.constants.Category;
import com.daervin.svc.common.constants.Constants;
import com.daervin.svc.common.dto.NewsDTO;
import com.daervin.svc.common.utils.DateTimeUtils;
import org.apache.log4j.Logger;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.selector.Selectable;

import java.util.*;

import static com.daervin.svc.common.constants.SourceEnum.JIE_MIAN;

/**
 * @author daervin
 * @version 1.0.0
 * @since 1.0.0
 */
public class JieMianParser extends RootParser {
    private final static Logger LOGGER = Logger.getLogger(JieMianParser.class);

    public JieMianParser(String... parentUrls) {
        super(parentUrls);
    }

    @Override
    public Site getSite() {
        return super.getSite();
    }

    @Override
    public void listProcess(Page page) {
        String xpathStr = "//*[@class=\"news-list\"]//div[@class='news-view']/";
        List<Selectable> newsViewList = page.getHtml().xpath(xpathStr).nodes();
        Calendar now = Calendar.getInstance();
        String today = DateTimeUtils.shortParse(now.getTime());
        String todayNow = DateTimeUtils.longParse(now.getTime());
        now.add(Calendar.DAY_OF_MONTH, -1);
        String yesterday = DateTimeUtils.shortParse(now.getTime());
        List<NewsDTO> newsList = new ArrayList<>();
        for (Selectable newsView : newsViewList) {
            try {
                List<Selectable> divList = newsView.xpath("//div[@class='text-news']/div").nodes();
                if (CollectionUtils.isEmpty(divList)) {
                    continue;
                }
                NewsDTO news = new NewsDTO();
                for (Selectable item : divList) {
                    String header = item.xpath("//div[@class='news-header']/h3/a/text()").get();
                    String links = item.xpath("//div[@class='news-header']/h3/a").links().get();
                    String main = item.xpath("//div[@class='news-main']/p/text()").get();
                    String footer = item.xpath("//div[@class='news-footer']/p/span[@class='date']/text()").get();
                    if (!StringUtils.isEmpty(header)) {
                        news.setTitle(header);
                    }
                    if (!StringUtils.isEmpty(main)) {
                        news.setDesc(main);
                    }

                    if (!StringUtils.isEmpty(footer)) {
                        if (footer.contains("刚刚") || footer.contains("分钟") || footer.contains("小时")) {
                            footer = todayNow;
                        } else if (footer.contains("今天")) {
                            footer = footer.replaceAll("今天", "").trim();
                            footer = today + " " + footer;
                        } else if (footer.contains("昨天")) {
                            footer = footer.replaceAll("昨天", "").trim();
                            footer = yesterday + " " + footer;
                        } else {
                            footer = footer.replaceAll("/", "-");
                            footer = "2019-" + footer;
                        }
                        news.setBelongDate(footer);
                    }
                    if (!StringUtils.isEmpty(links)) {
                        news.setLinks(links);
                    }
                }
                if (StringUtils.isEmpty(news.getTitle()) || StringUtils.isEmpty(news.getBelongDate()) || StringUtils.isEmpty(news.getLinks()) || StringUtils.isEmpty(news.getDesc())) {
                    continue;
                }
                // 补充省略内容
                if (news.getTitle().contains("...") || news.getDesc().contains("...")) {
                    Request targetRequest = new Request();
                    Map<String, Object> extras = new HashMap<>();
                    extras.put("category", Constants.URL_JIEMIAN_RZ.equalsIgnoreCase(page.getRequest().getUrl()) ? 2 : 1);
                    targetRequest.setExtras(extras);
                    targetRequest.setUrl(news.getLinks());
                    page.addTargetRequest(targetRequest);
                    continue;
                }

                if (Constants.URL_JIEMIAN_KJ.equalsIgnoreCase(page.getRequest().getUrl())) {
                    news.setCategory(JIE_MIAN.category);
                }
                if (Constants.URL_JIEMIAN_RZ.equalsIgnoreCase(page.getRequest().getUrl())) {
                    news.setCategory(Category.FINANCE.ordinal());
                }
                news.setAnnouncer(JIE_MIAN.announcer);
                newsList.add(news);
            } catch (Exception e) {
                System.err.println("JiemianParser error: " + e.getMessage());
                LOGGER.error("JiemianParser error", e);
            }
        }

        page.putField(Constants.PARSER_RESULT, newsList);
    }
}
